{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Generate Panlex Word Translation Dictionary\n",
        "This notebook is for generating translator dictionary using panlex database."
      ],
      "metadata": {
        "id": "DFIwjJci8gxZ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive')"
      ],
      "metadata": {
        "id": "VS61D8SUlVNy",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "d056fed8-44d0-40ca-d370-38bcf6fe8b8c"
      },
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import os, pickle\n",
        "import pandas as pd\n",
        "os.chdir(\"/content/drive/My Drive/Colab Notebooks/panlex-experiment/\")"
      ],
      "metadata": {
        "id": "jVw82syZlVP4"
      },
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!wget https://db.panlex.org/panlex-20230401-csv.zip"
      ],
      "metadata": {
        "id": "kMZvKrp8lVSM",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "7df04a7b-06b5-45db-c4ac-4f23bcc2b4a7"
      },
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "--2023-05-06 03:08:56--  https://db.panlex.org/panlex-20230401-csv.zip\n",
            "Resolving db.panlex.org (db.panlex.org)... 208.70.31.123\n",
            "Connecting to db.panlex.org (db.panlex.org)|208.70.31.123|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 1274847653 (1.2G) [application/zip]\n",
            "Saving to: ‘panlex-20230401-csv.zip’\n",
            "\n",
            "panlex-20230401-csv 100%[===================>]   1.19G   845KB/s    in 27m 37s \n",
            "\n",
            "2023-05-06 03:36:34 (751 KB/s) - ‘panlex-20230401-csv.zip’ saved [1274847653/1274847653]\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!ls"
      ],
      "metadata": {
        "id": "r5OtdfsRlVUa",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "82f590c9-8285-4fc6-d78b-30bcf180d86d"
      },
      "execution_count": 15,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "panlex-20230401-csv  panlex-20230401-csv.zip  panlex-experiment.ipynb\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!unzip panlex-20230401-csv.zip"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "4jE1cwCHBPbN",
        "outputId": "83422808-40da-4194-9438-bfe3cfe542ee"
      },
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Archive:  panlex-20230401-csv.zip\n",
            "   creating: panlex-20230401-csv/\n",
            "  inflating: panlex-20230401-csv/source.csv  \n",
            "  inflating: panlex-20230401-csv/denotation_class.csv  \n",
            "  inflating: panlex-20230401-csv/format.csv  \n",
            "  inflating: panlex-20230401-csv/denotation_prop.csv  \n",
            "  inflating: panlex-20230401-csv/expr.csv  \n",
            "  inflating: panlex-20230401-csv/langvar_cldr_char.csv  \n",
            "  inflating: panlex-20230401-csv/definition.csv  \n",
            "  inflating: panlex-20230401-csv/source_format.csv  \n",
            "  inflating: panlex-20230401-csv/lang_code.csv  \n",
            "  inflating: panlex-20230401-csv/source_langvar.csv  \n",
            "  inflating: panlex-20230401-csv/langvar.csv  \n",
            "  inflating: panlex-20230401-csv/langvar_char.csv  \n",
            "  inflating: panlex-20230401-csv/meaning_prop.csv  \n",
            "  inflating: panlex-20230401-csv/LICENSE.txt  \n",
            "  inflating: panlex-20230401-csv/source_license.csv  \n",
            "  inflating: panlex-20230401-csv/meaning_class.csv  \n",
            "  inflating: panlex-20230401-csv/meaning.csv  \n",
            "  inflating: panlex-20230401-csv/denotation.csv  \n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Load required tables"
      ],
      "metadata": {
        "id": "hTCbPoOz8t80"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "expr_df = pd.read_csv(\"./panlex-20230401-csv/expr.csv\")"
      ],
      "metadata": {
        "id": "aKH-X8qcBTpW"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "denotation_df = pd.read_csv(\"./panlex-20230401-csv/denotation.csv\")"
      ],
      "metadata": {
        "id": "qdGroLwVNjOa"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "langvar_df = pd.read_csv(\"./panlex-20230401-csv/langvar.csv\")"
      ],
      "metadata": {
        "id": "6L9thrY7BTtq"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Generate Dictionary"
      ],
      "metadata": {
        "id": "ILg4xBNf8_L8"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def create_translation_dictionary(\n",
        "    src_lang_code,\n",
        "    src_var_code,\n",
        "    dst_lang_code,\n",
        "    dst_var_code,\n",
        "):\n",
        "  src_langvar_id = langvar_df[(langvar_df['lang_code'] == src_lang_code) & (langvar_df['var_code'] == src_var_code)]['id'].item()\n",
        "  dst_langvar_id = langvar_df[(langvar_df['lang_code'] == dst_lang_code) & (langvar_df['var_code'] == dst_var_code)]['id'].item()\n",
        "  src_expr_df = expr_df[expr_df['langvar'] == src_langvar_id]\n",
        "  dst_expr_df = expr_df[expr_df['langvar'] == dst_langvar_id]\n",
        "  src_denotation_df = denotation_df[denotation_df['expr'].isin(src_expr_df['id'])]\n",
        "  dst_denotation_df = denotation_df[denotation_df['expr'].isin(dst_expr_df['id'])]\n",
        "  src_txt_df = pd.merge(src_denotation_df, src_expr_df, left_on='expr',right_on='id')\n",
        "  dst_txt_df = pd.merge(dst_denotation_df, dst_expr_df, left_on='expr',right_on='id')\n",
        "  src_txt_df['src_txt'] = src_txt_df['txt']\n",
        "  dst_txt_df['dst_txt'] = dst_txt_df['txt']\n",
        "  src_txt_df = src_txt_df[['meaning', 'src_txt']]\n",
        "  dst_txt_df = dst_txt_df[['meaning', 'dst_txt']]\n",
        "  src_to_dst_df = pd.merge(src_txt_df, dst_txt_df, on='meaning')\n",
        "  src_to_dst_df.drop_duplicates('src_txt', inplace=True)\n",
        "  src_to_dst_dict = {}\n",
        "  for _, row in src_to_dst_df.iterrows():\n",
        "    src_to_dst_dict[row['src_txt']] = row['dst_txt']\n",
        "  return src_to_dst_dict\n",
        "\n",
        "def create_save_translation_dictionary(\n",
        "    src_lang,\n",
        "    src_lang_code,\n",
        "    src_var_code,\n",
        "    dst_lang,\n",
        "    dst_lang_code,\n",
        "    dst_var_code,\n",
        "):\n",
        "  src_to_dst_dict = create_translation_dictionary(\n",
        "      src_lang_code=src_lang_code,\n",
        "      src_var_code=src_var_code,\n",
        "      dst_lang_code=dst_lang_code,\n",
        "      dst_var_code=dst_var_code,\n",
        "  )\n",
        "  filename = f'{src_lang}_to_{dst_lang}.pkl'\n",
        "  with open(filename, 'wb') as fp:\n",
        "    pickle.dump(src_to_dst_dict, fp)\n",
        "    print(f'[INFO] dictionary {filename} saved successfully.')\n",
        "  return src_to_dst_dict\n",
        "\n",
        "def generate_word_translator_pickle_files(\n",
        "  language_details,\n",
        "  base_lang,\n",
        "  base_language_details,\n",
        "):\n",
        "  languages = list(language_details.keys())\n",
        "  for lang in languages:\n",
        "    # base_lang to lang\n",
        "    create_save_translation_dictionary(\n",
        "        src_lang=base_lang,\n",
        "        src_lang_code=base_language_details[base_lang]['lang_code'],\n",
        "        src_var_code=base_language_details[base_lang]['var_code'],\n",
        "        dst_lang=lang,\n",
        "        dst_lang_code=language_details[lang]['lang_code'],\n",
        "        dst_var_code=language_details[lang]['var_code'],\n",
        "    )\n",
        "    # lang to base_lang\n",
        "    create_save_translation_dictionary(\n",
        "        dst_lang=base_lang,\n",
        "        dst_lang_code=base_language_details[base_lang]['lang_code'],\n",
        "        dst_var_code=base_language_details[base_lang]['var_code'],\n",
        "        src_lang=lang,\n",
        "        src_lang_code=language_details[lang]['lang_code'],\n",
        "        src_var_code=language_details[lang]['var_code'],\n",
        "    )"
      ],
      "metadata": {
        "id": "SwwZWGEIX7ez"
      },
      "execution_count": 43,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# test case\n",
        "# src_to_dst_dict = create_translation_dictionary(\n",
        "#     src_lang_code='sun',\n",
        "#     src_var_code=0,\n",
        "#     dst_lang_code='ind',\n",
        "#     dst_var_code=0,\n",
        "# )"
      ],
      "metadata": {
        "id": "RAZ0CxbebN76"
      },
      "execution_count": 23,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# test case\n",
        "# create_save_translation_dictionary(\n",
        "#     src_lang='sun',\n",
        "#     src_lang_code='sun',\n",
        "#     src_var_code=0,\n",
        "#     dst_lang='ind',\n",
        "#     dst_lang_code='ind',\n",
        "#     dst_var_code=0,\n",
        "# )"
      ],
      "metadata": {
        "id": "WRkdfvy54kW_"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "language_details = {\n",
        "    'abs' : {'lang_code' : 'abs', 'var_code' : 0},\n",
        "    'btk' : {'lang_code' : 'bya', 'var_code' : 0},\n",
        "    'bew' : {'lang_code' : 'bew', 'var_code' : 0},\n",
        "    'bhp' : {'lang_code' : 'bhp', 'var_code' : 0},\n",
        "    'jav' : {'lang_code' : 'jav', 'var_code' : 0},\n",
        "    'mad' : {'lang_code' : 'mad', 'var_code' : 0},\n",
        "    'mak' : {'lang_code' : 'mfp', 'var_code' : 0},\n",
        "    'min' : {'lang_code' : 'min', 'var_code' : 0},\n",
        "    'mui' : {'lang_code' : 'mui', 'var_code' : 0},\n",
        "    'rej' : {'lang_code' : 'rej', 'var_code' : 0},\n",
        "    'sun' : {'lang_code' : 'sun', 'var_code' : 0},\n",
        "}\n",
        "\n",
        "base_language_details = {\n",
        "    'ind' : {'lang_code' : 'ind', 'var_code' : 0},\n",
        "}\n",
        "\n",
        "generate_word_translator_pickle_files(\n",
        "  language_details = language_details,\n",
        "  base_lang = 'ind',\n",
        "  base_language_details = base_language_details,\n",
        ")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "VNa9z6S82IMn",
        "outputId": "e2eb2d95-36bf-439c-e887-05a7e7e68560"
      },
      "execution_count": 58,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[INFO] dictionary ind_to_abs.pkl saved successfully.\n",
            "[INFO] dictionary abs_to_ind.pkl saved successfully.\n",
            "[INFO] dictionary ind_to_btk.pkl saved successfully.\n",
            "[INFO] dictionary btk_to_ind.pkl saved successfully.\n",
            "[INFO] dictionary ind_to_bew.pkl saved successfully.\n",
            "[INFO] dictionary bew_to_ind.pkl saved successfully.\n",
            "[INFO] dictionary ind_to_bhp.pkl saved successfully.\n",
            "[INFO] dictionary bhp_to_ind.pkl saved successfully.\n",
            "[INFO] dictionary ind_to_jav.pkl saved successfully.\n",
            "[INFO] dictionary jav_to_ind.pkl saved successfully.\n",
            "[INFO] dictionary ind_to_mad.pkl saved successfully.\n",
            "[INFO] dictionary mad_to_ind.pkl saved successfully.\n",
            "[INFO] dictionary ind_to_mak.pkl saved successfully.\n",
            "[INFO] dictionary mak_to_ind.pkl saved successfully.\n",
            "[INFO] dictionary ind_to_min.pkl saved successfully.\n",
            "[INFO] dictionary min_to_ind.pkl saved successfully.\n",
            "[INFO] dictionary ind_to_mui.pkl saved successfully.\n",
            "[INFO] dictionary mui_to_ind.pkl saved successfully.\n",
            "[INFO] dictionary ind_to_rej.pkl saved successfully.\n",
            "[INFO] dictionary rej_to_ind.pkl saved successfully.\n",
            "[INFO] dictionary ind_to_sun.pkl saved successfully.\n",
            "[INFO] dictionary sun_to_ind.pkl saved successfully.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "with open('ind_to_rej.pkl', 'rb') as fp:\n",
        "    translator = pickle.load(fp)\n",
        "    print('translator dictionary')\n",
        "    print(translator)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "7Vc_cWHj2IKE",
        "outputId": "5aa345f0-35c0-4d2a-ca77-ac67f03bb4a0"
      },
      "execution_count": 59,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "translator dictionary\n",
            "{'enam': 'enum', 'air': 'biyoa', 'hitam': 'məluəw', 'hujan': 'ujən', 'berjalan': 'bəpanəw', 'aku': 'uku', 'saya': 'uku', 'zon': 'uku', 'kuning': 'kuniŋ', 'kuniŋ': 'kuniŋ', 'semua': 'kətə', 'sepuluh': 'poloaʔ', 'satu': 'do', 'ini': 'dio', 'anjing': 'kuyuʔ', 'anjiŋ': 'kuyuʔ', 'mati': 'matiɛ', 'mata': 'maṱəy', 'ikan': 'kan', 'tangan': 'taŋən', 'taŋan': 'taŋən', 'malam': 'kəlmən', 'kecil': 'titiʔ', 'dan': 'ŋen [dan]', 'burung': 'buruŋ', 'buruŋ': 'buruŋ', 'angin': 'aŋin', 'aŋin': 'aŋin', 'datang': 'təko', 'serta': 'təko', 'ikut': 'təko', 'dataŋ': 'təko', 'mendataŋ': 'təko', 'mendataŋi': 'təko', 'mendekati': 'təko', 'jalan': 'dalən', 'kepala': 'uləw', 'telinga': 'tiʔuʔ', 'kupiŋ': 'tiʔuʔ', 'teliŋa': 'tiʔuʔ', 'mulut': 'bebea[ʔ]', 'api': 'opoy', 'awan': 'awan', 'besar': 'loy', 'rambut': 'buʔ', 'berenang': 'bər̃naŋ', 'berenaŋ': 'bər̃naŋ', 'bulan': 'bulən', 'dingin': 'səŋãʔ', 'diŋin': 'səŋãʔ', 'langit': 'lɛŋɛt', 'laŋit': 'lɛŋɛt', 'laut': 'lautʔ', 'danau': 'danuəw', 'bagus': 'baiʔ ɑsəynə [rasanya]', 'baik': 'baiʔ ɑsəynə [rasanya]', 'orang': 'tun', 'oraŋ': 'tun', 'kotor': 'kotor', 'hidup': 'idup', 'tahu': 'nam', 'hati': 'atiɛ', 'nama': 'gɛn', 'tertawa': 'kətawəy', 'ketawa': 'kətawəy', 'mendengar': 'təmŋoa', 'mendeŋar': 'təmŋoa', 'bintang': 'bitaŋ', 'bintaŋ': 'bitaŋ', 'sakit': 'gis', 'mencium': 'səmiyum', 'belok': 'beloʔ', 'bélok': 'beloʔ', 'kiri': 'kiduəw', 'abu': 'abəw', 'kulit': 'kaʔ', 'belulaN': 'ka7', 'punggung': 'kədoŋ', 'belakaŋ': 'kədoŋ', 'perut': 'tənĩyɛ̃', 'usus': 'usus', 'isi perut': 'usus', 'lambuŋ': 'usus', 'usus besar': 'usus', 'usus kecil': 'usus', 'tulang': 'təlan', 'tulaŋ': 'təlan', 'darah': 'dalɨaʔ', 'membeli': 'mənokoa', 'tidur': 'tidoa', 'ular': 'duŋ', 'gigi': 'epen', 'pasir': 'bəniɛ', 'terbang': 'tər̃baŋ', 'terbaŋ': 'tər̃baŋ', 'bunga': 'buŋə̃y', 'buŋa': 'buŋə̃y', 'tali': 'tiləy', 'hari': 'biləy', 'di': 'di tempat', 'tujuh': 'tujua', 'rumput': 'dukut', 'dua': 'duəy', 'tiga': 'tələw', 'empat': 'pat', 'lima': 'lemo', 'tahun': 'taun', 'daging': 'dagiŋ', 'dagiŋ': 'dagiŋ', 'laki-laki': 'səboŋ', 'delapan': 'delapan', 'sembilan': 'semilan', 'buah': 'boaʔ', 'buah-buahan': 'boaʔ', 'sebiji buah': 'boaʔ', 'suami': 'aduʔ', 'baru': 'bələw', 'tipis': 'mipis', 'tanah': 'pitaʔ', 'dekat': 'paʔaʔ', 'hijau': 'ijo', 'merah': 'milɨaʔ', 'mérah': 'milɨaʔ', 'putih': 'puteaʔ', 'kalian': 'kumu kətə', 'kamu': 'kumu kətə', 'anda': 'kumu kətə', 'eŋkau': 'kumu kətə', 'saudara': 'ko', 'perempuan': 'biɛ', 'melihat': 'kəməleaʔ [kəmleaʔ]', 'lihat': 'k3m3lea7', 'tikus': 'tikus', 'jatuh': 'us', 'makan': 'makan nasi', 'tebal': 'kəboa', 'apa': 'jano', 'benar': 'bənɨã', 'kanan': 'kanən', 'ayah': 'baʔ', 'bapak': 'baʔ', 'bunda': 'inoʔ', 'ibu': 'inoʔ', 'anak': 'anaʔ', 'sayap': 'sayəp', 'panas': 'panəs', 'lain': 'leyen', 'bukan': '[bukan]', 'tidak': '[bukan]', 'itu': 'doʔo', 'kayu': 'kiyuəw', 'bagaimana': 'awe ipə', 'nyamuk': 'ñomoʔ', 'ñamuk': 'ñomoʔ', 'telur': 'tənõã', 'telor': 'tənõã', 'jahat': 'kidɛʔ', 'gigit': 'bətəŋə̃t', 'meŋgigit': 'bətəŋə̃t', 'bernafas': 'dia bernapas', 'bernapas': 'dia bernapas', 'kami': 'itə', 'kita': 'itə', 'nen': 'it3', 'dia': 'si', '[d]ia': 'si', 'mereka': 'toboʔo', 'meréka': 'toboʔo', 'bulu': 'buləw', 'hidung': 'ñuŋ', 'hiduŋ': 'ñuŋ', 'lidah': 'dilɨaʔ', 'kaki': 'kekea', 'leher': 'kagən', 'léhér': 'kagən', 'siapa': 'api', 'meniup': 'bətiup [bertiup]', 'bertiup': 'bətiup [bertiup]', 'membakar': 'məŋəmbəm', 'memilih': 'məmeliaʔ', 'memotong': 'təm[ə]toʔ', 'memotoŋ': 'təm[ə]toʔ', 'menetak': 'təm[ə]toʔ', 'menggali': 'məmikoa', 'mimpi': 'bəmipəy', 'bermimpi': 'bəmipəy', 'tumbuh': 'idup', 'kilat': 'kilat', 'berbaring': 'məŋguleʔ-gulɛʔ', 'berbariŋ': 'məŋguleʔ-gulɛʔ', 'berbicara': 'məŋɛ̃cɛʔ', 'berkata': 'məŋɛ̃cɛʔ', 'kapan': 'təŋɛ̃n', 'bila': 'təŋɛ̃n', 'di mana': 'naʔ ipə', 'dimana': 'naʔ ipə', 'naik': 'kənɛʔ', 'memanjat': 'kənɛʔ', 'mendaki': 'kənɛʔ', 'menjahit': 'mə̃nɛ̃t', 'kering': 'kəʔiŋ', 'keriŋ': 'kəʔiŋ', 'menembak': 'mənimbaʔ', 'menémbak': 'mənimbaʔ', 'duduk': 'təmõt', 'menduduk': 'təmõt', 'meludah': 'bətəkɛʔ', 'meludahi': 'bətəkɛʔ', 'membengkak': 'məmbəkoʔ', 'membeŋkak': 'məmbəkoʔ', 'basah': 'lacɨaʔ', 'tua': 'tuəy', 'lama': 'tuəy', 'pendek': 'pɛndɛʔ', 'péndék': 'pɛndɛʔ', 'rumah': 'umɨaʔ', 'garam': 'siləy', 'ekor': 'ikoa', 'ékor': 'ikoa', 'debu': 'dəbʡw', 'batu': 'butəw', 'kabut': 'kabut', 'busuk': 'buʔuʔ', 'kutu': 'gutəw', 'tuma': 'gut3w', 'hutan': 'imo', 'rimba': 'imo', 'daun': 'dan', 'duan': 'dan', 'membunuh': 'monoaʔ', 'cacing': 'gəloŋ', 'caciŋ': 'gəloŋ', 'akar': 'balət', 'urat': 'balət', 'takut': 'sabən', 'atap': 'atəp', 'mencuri': 'maliŋ', 'asap': 'asəp', 'panjang': 'pañjaŋ', 'panjaŋ': 'pañjaŋ', 'membuka': 'məmukaʔ', 'susu': 'susəw', 'dada': 'susəw', 'buah dada': 'sus3w', '%payudara': 'sus3w', 'minum': 'makanan kecil', 'memukul': 'məlopoaʔ', 'menumbuk': 'mənutuʔ pae', 'mengalir': 'məŋaloa', 'meŋalir': 'məŋaloa', 'berat': 'bəʔət', 'memikir': 'mɛkɛr', 'berfikir': 'mɛkɛr', 'bahu': 'baəw', 'kalau': 'kaləw', 'mengikat': 'məŋɛ̃kɛt', 'meŋikat': 'məŋɛ̃kɛt', 'dahan': 'pələpɨaʔ', 'luas': 'libɨa', 'lebar': 'libɨa', 'lébar': 'libɨa', 'memegang': 'məməgoŋ', 'memegaŋ': 'məməgoŋ', 'sempit': 'səpit', 'jarum': 'dolom', 'muntah': 'mũtɨaʔ', 'isteri': 'prəpuan', 'menangis': 'məŋinoy', 'menaŋis': 'məŋinoy', 'mangunyah': 'bətinaʔ', 'meŋuñah': 'bətinaʔ', 'memasak': 'məŋəsaʔ', 'mengisap': 'məŋɛsɛp', 'meŋisap': 'məŋɛsɛp', 'menguap': 'sətiyam', 'meŋguap': 'sətiyam', 'berdiri': 'təgaʔ', 'berburu': 'bər̃burəw', 'menikam': 'təmikəm', 'menusuk': 'təmikəm', 'menggaruk': 'gəmãũt', 'meŋgarut': 'gəmãũt', 'membelah': 'mlay', 'tajam': 'tajəm', 'tumpul': 'topoa', 'bekerja': 'bəkər̃jo', 'menanam': 'təmanəm', 'memeras': 'mĩyõaʔ', 'meremas': 'mĩyõaʔ', 'melempar': 'məlũʔ', 'melémparkan': 'məlũʔ', 'lemak': 'gəmũʔ', 'laba-laba': 'lawɨaʔ lawɨaʔ', 'guntur': 'səmitoa', 'gemuruh': 'səmitoa', 'malu': 'sɛlɛkʔ', 'bersembunyi': 'məñũʔũk', 'sembuñi': 'məñũʔũk', '[di] dalam': 'naʔ ləm', 'di dalam': 'naʔ ləm', 'di atas': 'naʔ das', '[di] atas': 'naʔ das', 'di bawah': 'naʔ bɨaʔ', 'jauh': 'oaʔ', 'menghitung': 'mərɛkɛn', 'meŋhituŋ': 'mərɛkɛn', '%insan': 'tun', 'oraN': 'tun', 'anZiN': 'kuyu7', 'anjiN': 'kuyu7', 'tulaN': 't3lan', '%kupiN': 'ti7u7', 'teliNa': 'ti7u7', 'hiduN': '5uN', 'taNan': 'taN3n', 'mendeNar': 't3m3Noa', 'Zalan': 'dal3n', 'jalaN': 'dal3n', 'dataN': 't3ko', 'bintaN': 'bitaN', '%nama': 'gEn'}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "nM2I7qGozYIw"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}